{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "combined_df = pd.DataFrame()\n",
    "dirpath = os.getcwd()\n",
    "directory = os.fsencode(dirpath)\n",
    " \n",
    "for file in os.listdir(directory):\n",
    "    filename = os.fsdecode(file)\n",
    "    if filename.endswith(\".csv\"): \n",
    "        df = pd.read_csv(filename)\n",
    "        stock_name = df['Name'].iloc[0]\n",
    "        if len(combined_df)==0: \n",
    "            combined_df['Date'] = df['Date'].tolist()\n",
    "        if len(df) == len(combined_df):\n",
    "            combined_df[stock_name] = df['Close'].tolist()\n",
    "            \n",
    "combined_df = combined_df.set_index('Date')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "sp500_df = combined_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "sp100 = [\"AAPL\",\"ABBV\", \"ABT\", \"ACN\", \"AGN\", \"AIG\", \"ALL\",  \"AMGN\", \"AMZN\", \"AXP\", \"BA\", \"BAC\",  \"BIIB\",   \"BK\", \"BLK\",   \"BMY\", \"C\",  \"CAT\",  \"CELG\", \"CL\",      \"CMCSA\",                \"COF\",  \"COP\",  \"COST\", \"CSCO\", \"CVS\",   \"CVX\",  \"DD\",     \"DHR\",  \"DIS\",    \"DOW\", \"DUK\",  \"EMC\", \"EMR\",                \"EXC\",   \"F\",        \"FB\",      \"FDX\",   \"FOX\",  \"FOXA\", \"GD\",    \"GE\",     \"GILD\", \"GM\",   \"GOOG\",             \"GOOGL\",                \"GS\",     \"HAL\",   \"HD\",     \"HON\", \"IBM\",  \"INTC\", \"JNJ\",    \"JPM\",  \"KMI\",  \"KO\",     \"LLY\",    \"LMT\",  \"LOW\",                \"MA\",   \"MCD\", \"MDLZ\",               \"MDT\", \"MET\",  \"MMM\",              \"MO\",   \"MON\", \"MRK\", \"MS\",    \"MSFT\",                \"NEE\",   \"NKE\",  \"ORCL\", \"OXY\",  \"PCLN\", \"PEP\",   \"PFE\",   \"PG\",     \"PM\",    \"PYPL\", \"QCOM\",             \"RTN\",                \"SBUX\", \"SLB\",    \"SO\",     \"SPG\",  \"T\",        \"TGT\",   \"TWX\", \"TXN\",  \"UNH\", \"UNP\", \"UPS\",  \"USB\",  \"USD\",                \"UTX\",  \"V\",        \"VZ\",     \"WBA\", \"WFC\"]\n",
    " \n",
    "new_sp100 = []\n",
    "for ticker in sp100:\n",
    "    if ticker in combined_df.columns.values:\n",
    "        new_sp100.append(ticker)\n",
    "        \n",
    "sp100_df = combined_df[new_sp100].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                  AAPL       ABBV        ABT         ACN         AGN  \\\n",
      "Date                                                                   \n",
      "2014-10-29  107.339996  59.980000  42.740002   79.550003  238.779999   \n",
      "2014-10-30  106.980003  61.180000  43.490002   80.620003  243.210007   \n",
      "2014-10-31  108.000000  63.459999  43.590000   81.120003  242.740005   \n",
      "2014-11-03  109.400002  63.250000  43.290001   80.970001  247.500000   \n",
      "2014-11-04  108.599998  62.660000  43.610001   81.150002  245.320007   \n",
      "...                ...        ...        ...         ...         ...   \n",
      "2019-10-23  243.179993  77.750000  81.269997  184.070007  175.000000   \n",
      "2019-10-24  243.580002  76.800003  81.360001  185.000000  173.929993   \n",
      "2019-10-25  246.580002  76.529999  81.650002  183.070007  173.839996   \n",
      "2019-10-28  249.050003  78.330002  82.660004  182.869995  174.220001   \n",
      "2019-10-29  243.289993  78.470001  83.449997  184.800003  174.639999   \n",
      "\n",
      "                  AIG         ALL        AMGN         AMZN         AXP  ...  \\\n",
      "Date                                                                    ...   \n",
      "2014-10-29  52.360001   62.980000  158.880005   294.119995   88.339996  ...   \n",
      "2014-10-30  52.930000   63.910000  161.580002   299.070007   88.739998  ...   \n",
      "2014-10-31  53.570000   64.849998  162.179993   305.459991   89.949997  ...   \n",
      "2014-11-03  53.799999   64.720001  161.559998   305.720001   90.849998  ...   \n",
      "2014-11-04  53.400002   64.889999  160.410004   302.809998   91.379997  ...   \n",
      "...               ...         ...         ...          ...         ...  ...   \n",
      "2019-10-23  53.150002  108.180000  203.320007  1762.170044  116.489998  ...   \n",
      "2019-10-24  52.900002  108.260002  203.059998  1780.780029  116.410004  ...   \n",
      "2019-10-25  52.799999  107.239998  203.050003  1761.329956  118.260002  ...   \n",
      "2019-10-28  52.549999  106.599998  205.009995  1777.079956  118.580002  ...   \n",
      "2019-10-29  52.099998  108.220001  208.990005  1762.709961  117.419998  ...   \n",
      "\n",
      "                   TXN         UNH         UNP         UPS        USB  \\\n",
      "Date                                                                    \n",
      "2014-10-29   48.330002   92.959999  114.970001  102.620003  41.840000   \n",
      "2014-10-30   47.529999   93.879997  115.000000  103.290001  42.139999   \n",
      "2014-10-31   49.660000   95.010002  116.449997  104.910004  42.599998   \n",
      "2014-11-03   50.080002   94.860001  115.260002  105.639999  42.919998   \n",
      "2014-11-04   50.400002   94.980003  115.279999  107.080002  43.160000   \n",
      "...                ...         ...         ...         ...        ...   \n",
      "2019-10-23  118.949997  247.800003  170.029999  115.150002  56.580002   \n",
      "2019-10-24  118.410004  244.509995  170.389999  114.010002  56.310001   \n",
      "2019-10-25  120.510002  244.910004  172.330002  115.720001  56.740002   \n",
      "2019-10-28  120.000000  247.050003  170.750000  116.250000  57.020000   \n",
      "2019-10-29  120.290001  252.289993  170.190002  116.910004  57.340000   \n",
      "\n",
      "                   UTX           V         VZ        WBA        WFC  \n",
      "Date                                                                 \n",
      "2014-10-29  105.849998   53.665001  49.830002  62.830002  52.169998  \n",
      "2014-10-30  106.339996   59.162498  49.900002  62.959999  52.459999  \n",
      "2014-10-31  107.000000   60.357498  50.250000  64.220001  53.090000  \n",
      "2014-11-03  106.300003   60.384998  50.389999  64.980003  53.369999  \n",
      "2014-11-04  106.879997   60.742500  50.330002  65.239998  53.270000  \n",
      "...                ...         ...        ...        ...        ...  \n",
      "2019-10-23  139.910004  171.320007  60.880001  54.560001  50.930000  \n",
      "2019-10-24  143.440002  176.160004  60.580002  54.450001  51.099998  \n",
      "2019-10-25  142.960007  177.850006  60.369999  55.419998  51.570000  \n",
      "2019-10-28  143.039993  179.839996  60.180000  55.799999  51.650002  \n",
      "2019-10-29  142.000000  177.630005  60.189999  56.270000  52.169998  \n",
      "\n",
      "[1259 rows x 90 columns]\n"
     ]
    }
   ],
   "source": [
    "print(sp100_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "def create_np(df, name):\n",
    "\n",
    "    i=0\n",
    "    corr_tensor = []\n",
    "    dates = []\n",
    "    while i < len(df):\n",
    "        sub_df = df.iloc[i:min(i+30,len(df))]\n",
    "        corr = sub_df.corr(method='pearson')\n",
    "        corr_np = corr.to_numpy()\n",
    "        corr_tensor.append(corr_np)\n",
    "        dates.append(df.index.values[i])\n",
    "        i += 30\n",
    "    corr_tensor = np.asarray(corr_tensor)\n",
    "    tmp_dates = [pd.to_datetime(x) for x in dates]\n",
    "    dates = np.asarray(tmp_dates)\n",
    "    nodes = np.asarray(df.columns.values)\n",
    "\n",
    "    np.save(name + '_corr', corr_tensor)\n",
    "    np.save(name + '_dates', dates)\n",
    "    np.save(name + '_nodes', nodes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "create_np(sp100_df, 'sp100')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "create_np(sp500_df, 'sp500')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
